import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
p=pd.read_csv("G:/My Drive/INNO_INTERN/DATASETS/project2.csv")
p.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 112634 entries, 0 to 112633 Data columns (total 17 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 VIN (1-10) 112634 non-null object 1 County 112634 non-null object 2 City 112634 non-null object 3 State 112634 non-null object 4 Postal Code 112634 non-null int64 5 Model Year 112634 non-null int64 6 Make 112634 non-null object 7 Model 112614 non-null object 8 Electric Vehicle Type 112634 non-null object 9 Clean Alternative Fuel Vehicle (CAFV) Eligibility 112634 non-null object 10 Electric Range 112634 non-null int64 11 Base MSRP 112634 non-null int64 12 Legislative District 112348 non-null float64 13 DOL Vehicle ID 112634 non-null int64 14 Vehicle Location 112610 non-null object 15 Electric Utility 112191 non-null object 16 2020 Census Tract 112634 non-null int64 dtypes: float64(1), int64(6), object(10) memory usage: 14.6+ MB
p.head()
| VIN (1-10) | County | City | State | Postal Code | Model Year | Make | Model | Electric Vehicle Type | Clean Alternative Fuel Vehicle (CAFV) Eligibility | Electric Range | Base MSRP | Legislative District | DOL Vehicle ID | Vehicle Location | Electric Utility | 2020 Census Tract | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | JTMEB3FV6N | Monroe | Key West | FL | 33040 | 2022 | TOYOTA | RAV4 PRIME | Plug-in Hybrid Electric Vehicle (PHEV) | Clean Alternative Fuel Vehicle Eligible | 42 | 0 | NaN | 198968248 | POINT (-81.80023 24.5545) | NaN | 12087972100 |
| 1 | 1G1RD6E45D | Clark | Laughlin | NV | 89029 | 2013 | CHEVROLET | VOLT | Plug-in Hybrid Electric Vehicle (PHEV) | Clean Alternative Fuel Vehicle Eligible | 38 | 0 | NaN | 5204412 | POINT (-114.57245 35.16815) | NaN | 32003005702 |
| 2 | JN1AZ0CP8B | Yakima | Yakima | WA | 98901 | 2011 | NISSAN | LEAF | Battery Electric Vehicle (BEV) | Clean Alternative Fuel Vehicle Eligible | 73 | 0 | 15.0 | 218972519 | POINT (-120.50721 46.60448) | PACIFICORP | 53077001602 |
| 3 | 1G1FW6S08H | Skagit | Concrete | WA | 98237 | 2017 | CHEVROLET | BOLT EV | Battery Electric Vehicle (BEV) | Clean Alternative Fuel Vehicle Eligible | 238 | 0 | 39.0 | 186750406 | POINT (-121.7515 48.53892) | PUGET SOUND ENERGY INC | 53057951101 |
| 4 | 3FA6P0SU1K | Snohomish | Everett | WA | 98201 | 2019 | FORD | FUSION | Plug-in Hybrid Electric Vehicle (PHEV) | Not eligible due to low battery range | 26 | 0 | 38.0 | 2006714 | POINT (-122.20596 47.97659) | PUGET SOUND ENERGY INC | 53061041500 |
p.tail()
| VIN (1-10) | County | City | State | Postal Code | Model Year | Make | Model | Electric Vehicle Type | Clean Alternative Fuel Vehicle (CAFV) Eligibility | Electric Range | Base MSRP | Legislative District | DOL Vehicle ID | Vehicle Location | Electric Utility | 2020 Census Tract | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 112629 | 7SAYGDEF2N | King | Duvall | WA | 98019 | 2022 | TESLA | MODEL Y | Battery Electric Vehicle (BEV) | Eligibility unknown as battery range has not b... | 0 | 0 | 45.0 | 217955265 | POINT (-121.98609 47.74068) | PUGET SOUND ENERGY INC||CITY OF TACOMA - (WA) | 53033032401 |
| 112630 | 1N4BZ1CP7K | San Juan | Friday Harbor | WA | 98250 | 2019 | NISSAN | LEAF | Battery Electric Vehicle (BEV) | Clean Alternative Fuel Vehicle Eligible | 150 | 0 | 40.0 | 103663227 | POINT (-123.01648 48.53448) | BONNEVILLE POWER ADMINISTRATION||ORCAS POWER &... | 53055960301 |
| 112631 | 1FMCU0KZ4N | King | Vashon | WA | 98070 | 2022 | FORD | ESCAPE | Plug-in Hybrid Electric Vehicle (PHEV) | Clean Alternative Fuel Vehicle Eligible | 38 | 0 | 34.0 | 193878387 | POINT (-122.4573 47.44929) | PUGET SOUND ENERGY INC||CITY OF TACOMA - (WA) | 53033027702 |
| 112632 | KNDCD3LD4J | King | Covington | WA | 98042 | 2018 | KIA | NIRO | Plug-in Hybrid Electric Vehicle (PHEV) | Not eligible due to low battery range | 26 | 0 | 47.0 | 125039043 | POINT (-122.09124 47.33778) | PUGET SOUND ENERGY INC||CITY OF TACOMA - (WA) | 53033032007 |
| 112633 | YV4BR0CL8N | King | Covington | WA | 98042 | 2022 | VOLVO | XC90 | Plug-in Hybrid Electric Vehicle (PHEV) | Not eligible due to low battery range | 18 | 0 | 47.0 | 194673692 | POINT (-122.09124 47.33778) | PUGET SOUND ENERGY INC||CITY OF TACOMA - (WA) | 53033032005 |
p.describe()
| Postal Code | Model Year | Electric Range | Base MSRP | Legislative District | DOL Vehicle ID | 2020 Census Tract | |
|---|---|---|---|---|---|---|---|
| count | 112634.000000 | 112634.000000 | 112634.000000 | 112634.000000 | 112348.000000 | 1.126340e+05 | 1.126340e+05 |
| mean | 98156.226850 | 2019.003365 | 87.812987 | 1793.439681 | 29.805604 | 1.994567e+08 | 5.296650e+10 |
| std | 2648.733064 | 2.892364 | 102.334216 | 10783.753486 | 14.700545 | 9.398427e+07 | 1.699104e+09 |
| min | 1730.000000 | 1997.000000 | 0.000000 | 0.000000 | 1.000000 | 4.777000e+03 | 1.101001e+09 |
| 25% | 98052.000000 | 2017.000000 | 0.000000 | 0.000000 | 18.000000 | 1.484142e+08 | 5.303301e+10 |
| 50% | 98119.000000 | 2020.000000 | 32.000000 | 0.000000 | 34.000000 | 1.923896e+08 | 5.303303e+10 |
| 75% | 98370.000000 | 2022.000000 | 208.000000 | 0.000000 | 43.000000 | 2.191899e+08 | 5.305307e+10 |
| max | 99701.000000 | 2023.000000 | 337.000000 | 845000.000000 | 49.000000 | 4.792548e+08 | 5.603300e+10 |
# ------------- Univariate Analysis -------------
# 1. Distribution of Model Year
plt.figure(figsize=(10, 6))
sns.histplot(p['Model Year'], kde=True, bins=30, color='green')
plt.title('Distribution of Model Year')
plt.xlabel('Model Year')
plt.ylabel('Count')
plt.show()
C:\Users\sadgu\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
# 2. Distribution of Electric Vehicle Type
plt.figure(figsize=(10, 6))
sns.countplot(data=p, x='Electric Vehicle Type', palette='Set3')
plt.title('Count of Electric Vehicle Types')
plt.xlabel('Electric Vehicle Type')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.show()
# 3. Distribution of Electric Range
plt.figure(figsize=(10, 6))
sns.histplot(p['Electric Range'], kde=True, bins=30, color='red')
plt.title('Distribution of Electric Range')
plt.xlabel('Electric Range (miles)')
plt.ylabel('Count')
plt.show()
C:\Users\sadgu\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
# ------------- Bivariate Analysis -------------
# 4. Electric Range vs. Model Year
plt.figure(figsize=(10, 6))
sns.scatterplot(data=p, x='Model Year', y='Electric Range', hue='Electric Vehicle Type', palette='coolwarm')
plt.title('Electric Range vs. Model Year')
plt.xlabel('Model Year')
plt.ylabel('Electric Range (miles)')
plt.legend(title='Electric Vehicle Type')
plt.show()
# 5. Electric Range by Make
plt.figure(figsize=(10, 6))
sns.boxplot(data=p, x='Make', y='Electric Range', palette='Set1')
plt.title('Electric Range by Vehicle Make')
plt.xlabel('Make')
plt.ylabel('Electric Range (miles)')
plt.xticks(rotation=90)
plt.show()
# 6. Base MSRP vs Electric Range
plt.figure(figsize=(10, 6))
sns.scatterplot(data=p, x='Base MSRP', y='Electric Range', hue='Electric Vehicle Type', palette='viridis')
plt.title('Base MSRP vs Electric Range')
plt.xlabel('Base MSRP (USD)')
plt.ylabel('Electric Range (miles)')
plt.legend(title='Electric Vehicle Type')
plt.show()
# 7. Count of Clean Alternative Fuel Vehicle Eligibility
plt.figure(figsize=(10, 6))
sns.countplot(data=p, x='Clean Alternative Fuel Vehicle (CAFV) Eligibility', palette='Set1')
plt.title('Count of CAFV Eligibility')
plt.xlabel('CAFV Eligibility')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.show()
# Missing Data Check
print(p.isnull().sum())
VIN (1-10) 0 County 0 City 0 State 0 Postal Code 0 Model Year 0 Make 0 Model 20 Electric Vehicle Type 0 Clean Alternative Fuel Vehicle (CAFV) Eligibility 0 Electric Range 0 Base MSRP 0 Legislative District 286 DOL Vehicle ID 0 Vehicle Location 24 Electric Utility 443 2020 Census Tract 0 dtype: int64
import seaborn as sns
import matplotlib.pyplot as plt
# Heatmap to visualize missing data
plt.figure(figsize=(10, 6))
sns.heatmap(p.isnull(), cbar=False, cmap='viridis')
plt.title('Missing Data Heatmap')
plt.show()
# Boxplot for outlier detection in Electric Range
plt.figure(figsize=(10, 6))
sns.boxplot(data=p, y='Electric Range')
plt.title('Boxplot of Electric Range (Outlier Detection)')
plt.show()
# Boxplot for outlier detection in Base MSRP
plt.figure(figsize=(10, 6))
sns.boxplot(data=p, y='Base MSRP')
plt.title('Boxplot of Base MSRP (Outlier Detection)')
plt.show()
# Correlation matrix and heatmap
plt.figure(figsize=(10, 6))
corr_matrix = p[['Electric Range', 'Model Year', 'Base MSRP']].corr()
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', linewidths=0.5)
plt.title('Correlation Heatmap')
plt.show()
# Boxplot for Electric Range by CAFV Eligibility
plt.figure(figsize=(10, 6))
sns.boxplot(data=p, x='Clean Alternative Fuel Vehicle (CAFV) Eligibility', y='Electric Range')
plt.title('Electric Range by CAFV Eligibility')
plt.xlabel('CAFV Eligibility')
plt.ylabel('Electric Range (miles)')
plt.xticks(rotation=45)
plt.show()
# Electric Range by City (Top 10 Cities)
top_cities = p['City'].value_counts().nlargest(10).index
filtered_data = p[p['City'].isin(top_cities)]
plt.figure(figsize=(10, 6))
sns.boxplot(data=filtered_data, x='City', y='Electric Range', palette='Set3')
plt.title('Electric Range by City (Top 10 Cities)')
plt.xlabel('City')
plt.ylabel('Electric Range (miles)')
plt.xticks(rotation=90)
plt.show()
!pip install plotly
Requirement already satisfied: plotly in c:\users\sadgu\anaconda3\lib\site-packages (5.9.0) Requirement already satisfied: tenacity>=6.2.0 in c:\users\sadgu\anaconda3\lib\site-packages (from plotly) (8.2.2)
import plotly.express as px
scatter_plot = px.scatter(p, x="Electric Range", y="Base MSRP", title="Scatter Plot: Electric Range vs Base MSRP")
scatter_plot.show()
box_plot = px.box(p, x="Electric Vehicle Type", y="Electric Range", title="Box Plot: Electric Vehicle Type vs Electric Range")
box_plot.show()
vehicle_type_count = p['Electric Vehicle Type'].value_counts().reset_index()
vehicle_type_count.columns = ['Electric Vehicle Type', 'Count']
pie_chart = px.pie(vehicle_type_count, names='Electric Vehicle Type', values='Count', title="Pie Chart: Distribution of Electric Vehicle Types")
pie_chart.show()
vehicle_count_by_state = p['State'].value_counts().reset_index()
vehicle_count_by_state.columns = ['State', 'Vehicle Count']
choropleth = px.choropleth(vehicle_count_by_state,
locations="State",
locationmode="USA-states",
color="Vehicle Count",
scope="usa",
title="Choropleth Map: Number of EV Vehicles by State")
choropleth.show()
animated_choropleth = px.choropleth(p,
locations="State",
locationmode="USA-states",
color="Electric Range",
animation_frame="Model Year",
scope="usa",
title="Animated Choropleth: Electric Range over Model Year by State")
animated_choropleth.show()
C:\Users\sadgu\anaconda3\Lib\site-packages\plotly\express\_core.py:1979: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
!pip install bar_chart_race
Collecting bar_chart_race Obtaining dependency information for bar_chart_race from https://files.pythonhosted.org/packages/09/01/f6d1a1a0978b39560843c54be7349804d7d2faef0a869acd7c8a6fc920b0/bar_chart_race-0.1.0-py3-none-any.whl.metadata Downloading bar_chart_race-0.1.0-py3-none-any.whl.metadata (4.2 kB) Requirement already satisfied: pandas>=0.24 in c:\users\sadgu\anaconda3\lib\site-packages (from bar_chart_race) (2.2.3) Requirement already satisfied: matplotlib>=3.1 in c:\users\sadgu\anaconda3\lib\site-packages (from bar_chart_race) (3.7.2) Requirement already satisfied: contourpy>=1.0.1 in c:\users\sadgu\anaconda3\lib\site-packages (from matplotlib>=3.1->bar_chart_race) (1.2.1) Requirement already satisfied: cycler>=0.10 in c:\users\sadgu\anaconda3\lib\site-packages (from matplotlib>=3.1->bar_chart_race) (0.11.0) Requirement already satisfied: fonttools>=4.22.0 in c:\users\sadgu\anaconda3\lib\site-packages (from matplotlib>=3.1->bar_chart_race) (4.25.0) Requirement already satisfied: kiwisolver>=1.0.1 in c:\users\sadgu\anaconda3\lib\site-packages (from matplotlib>=3.1->bar_chart_race) (1.4.4) Requirement already satisfied: numpy>=1.20 in c:\users\sadgu\anaconda3\lib\site-packages (from matplotlib>=3.1->bar_chart_race) (1.24.3) Requirement already satisfied: packaging>=20.0 in c:\users\sadgu\anaconda3\lib\site-packages (from matplotlib>=3.1->bar_chart_race) (23.1) Requirement already satisfied: pillow>=6.2.0 in c:\users\sadgu\anaconda3\lib\site-packages (from matplotlib>=3.1->bar_chart_race) (9.4.0) Requirement already satisfied: pyparsing<3.1,>=2.3.1 in c:\users\sadgu\anaconda3\lib\site-packages (from matplotlib>=3.1->bar_chart_race) (3.0.9) Requirement already satisfied: python-dateutil>=2.7 in c:\users\sadgu\anaconda3\lib\site-packages (from matplotlib>=3.1->bar_chart_race) (2.8.2) Requirement already satisfied: pytz>=2020.1 in c:\users\sadgu\anaconda3\lib\site-packages (from pandas>=0.24->bar_chart_race) (2023.3.post1) Requirement already satisfied: tzdata>=2022.7 in c:\users\sadgu\anaconda3\lib\site-packages (from pandas>=0.24->bar_chart_race) (2023.3) Requirement already satisfied: six>=1.5 in c:\users\sadgu\anaconda3\lib\site-packages (from python-dateutil>=2.7->matplotlib>=3.1->bar_chart_race) (1.16.0) Downloading bar_chart_race-0.1.0-py3-none-any.whl (156 kB) ---------------------------------------- 0.0/156.8 kB ? eta -:--:-- ------------------ --------------------- 71.7/156.8 kB 4.1 MB/s eta 0:00:01 ------------------ --------------------- 71.7/156.8 kB 4.1 MB/s eta 0:00:01 ------------------ --------------------- 71.7/156.8 kB 4.1 MB/s eta 0:00:01 ---------------------- ---------------- 92.2/156.8 kB 655.4 kB/s eta 0:00:01 ---------------------- ---------------- 92.2/156.8 kB 655.4 kB/s eta 0:00:01 -------------------------------------- 156.8/156.8 kB 672.4 kB/s eta 0:00:00 Installing collected packages: bar_chart_race Successfully installed bar_chart_race-0.1.0
# Assuming you have already loaded your dataset
# Create a pivot table with counts of vehicles by 'Make' and 'Model Year'
pivot_data = p.pivot_table(index="Model Year", columns="Make", aggfunc="size", fill_value=0)
# Sort the columns by sum of vehicle counts
pivot_data = pivot_data.loc[:, pivot_data.sum(axis=0).sort_values(ascending=False).index]
import bar_chart_race as bcr
# Create a pivot table with counts of vehicles by 'Make' and 'Model Year'
pivot_data = p.pivot_table(index="Model Year", columns="Make", aggfunc="size", fill_value=0)
# Reset index to make 'Model Year' a column
pivot_data.reset_index(inplace=True)
melted_data = pivot_data.melt(id_vars=["Model Year"], var_name="Make", value_name="Count")
# Create an animated bar plot
fig = px.bar(melted_data,
x='Count',
y='Make',
color='Make',
animation_frame='Model Year',
range_x=[0, melted_data['Count'].max() + 10], # Adjust range for better visualization
title='Year-wise EV Make Sales Animation',
orientation='h')
fig.update_layout(
title_font=dict(size=30),
xaxis_title_font=dict(size=20),
yaxis_title_font=dict(size=20),
width=1000,
height=600,
bargap=0.1,
)